daily: daily summary of ridesdurham_voters: one row per voterdaily <- read_csv("data/daily.csv")
durham_voters <- read_csv("data/durham_voters.csv")
Exercise: Which of the four datasets does this visualization use? Determine which variable is mapped to which aesthetic (x-axis, y-axis, etc.) element of the dataset.
ggplot(data = daily)
ggplot(data = daily, mapping = aes(x = ride_date, y = n_rides))
ggplot(data = daily, mapping = aes(x = ride_date, y = n_rides)) +
geom_point()
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point()
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_smooth(method = "loess")
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_smooth(method = "loess", se = FALSE)
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_smooth(method = "loess", se = FALSE) +
scale_color_viridis_d()
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_smooth(method = "loess", se = FALSE) +
scale_color_viridis_d() +
theme_minimal()
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_smooth(se = FALSE, method = "loess") +
scale_color_viridis_d() +
theme_minimal() +
labs(x = "Ride date", y = "Number of rides", color = "Day of week",
title = "Daily rides", subtitle = "Durham, NC")
ggplot(data = daily, aes(x = ride_date, y = n_rides, size = n_riders)) +
geom_point()
ggplot(data = daily, aes(x = ride_date, y = n_rides, size = n_riders)) +
geom_point(alpha = 0.5)
Exercise: Using information from https://ggplot2.tidyverse.org/articles/ggplot2-specs.html add color, size, alpha, and shape aesthetics to your graph. Experiment. Do different things happen when you map aesthetics to discrete and continuous variables? What happens when you use more than one aesthetic?
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_smooth(se = FALSE, method = "loess") +
scale_color_viridis_d() +
theme_minimal() +
labs(x = "Ride date", y = "Number of rides", color = "Day of week",
title = "Daily rides", subtitle = "Durham, NC")
geom levelggplot(data = daily) +
geom_point(mapping = aes(x = ride_date, y = n_rides))
geomsggplot(data = daily, mapping = aes(x = ride_date, y = n_rides)) +
geom_point() +
geom_smooth(aes(color = day_of_week), method = "loess", se = FALSE)
aes()ggplot(data = daily,
mapping = aes(x = ride_date,
y = n_rides,
color = day_of_week)) +
geom_point()
aes()ggplot(data = daily,
mapping = aes(x = ride_date,
y = n_rides)) +
geom_point(color = "red")
daily %>%
ggplot(aes(x = ride_date, y = n_rides)) +
geom_point()
ggplot(daily, aes(x = ride_date, y = n_rides)) +
geom_point()
Color by weekday / weekend
ggplot(data = daily, aes(x = ride_date, y = n_rides,
color = day_of_week %in% c("Sat", "Sun"))) +
geom_point()
ggplot(data = daily, aes(x = ride_date, y = n_rides,
color = day_of_week %in% c("Sat", "Sun"))) +
geom_point() +
labs(color = "Weekend")
daily %>%
mutate(day_type = if_else(day_of_week %in% c("Sat", "Sun"),
"Weekend",
"Weekday")) %>%
ggplot(aes(x = ride_date, y = n_rides, color = day_type)) +
geom_point()
ggplot(data = daily) +
geom_point(aes(x = ride_date, y = n_rides, color = "blue"))
ggplot(data = daily) +
geom_point(aes(x = ride_date, y = n_rides), color = "blue")
Exercise: What is wrong with the following?
daily %>%
mutate(day_type = if_else(day_of_week %in% c("Sat", "Sun"),
"Weekend",
"Weekday")) %>%
ggplot(aes(x = ride_date, y = n_rides, color = day_type)) %>%
geom_point()
What is wrong with the following?
daily %>%
mutate(day_type = if_else(day_of_week %in% c("Sat", "Sun"),
"Weekend",
"Weekday")) %>%
ggplot(aes(x = ride_date, y = n_rides, color = day_type)) %>%
geom_point()
## Error: `mapping` must be created by `aes()`
## Did you use %>% instead of +?
ggplot(data = daily, aes(x = ride_date, y = n_rides)) +
geom_point()
ggplot(data = daily, aes(x = ride_date, y = n_rides)) +
geom_point() +
geom_line()
ggplot(data = daily, aes(x = ride_date, y = n_rides)) +
geom_point() +
geom_smooth(span = 0.1) # try changing span
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point() +
geom_line()
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_smooth(span = 0.2, se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point(data = filter(daily,
!(day_of_week %in% c("Sat", "Sun"))
& n_rides < 200),
size = 5, color = "gray") +
geom_point()
Exercise: Work with your neighbor to sketch what the following plot will look like. No cheating! Do not run the code, just think through the code for the time being.
low_weekdays <- daily %>%
filter(!(day_of_week %in% c("Sat", "Sun")) & n_rides < 100)
low_weekdays
## # A tibble: 9 x 7
## ride_date day_of_week month n_rides n_riders n_unique_stops
## <date> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2015-01-01 Thurs Jan 58 37 44
## 2 2015-01-26 Mon Jan 58 52 15
## 3 2015-01-28 Wed Jan 79 65 11
## 4 2015-01-30 Fri Jan 25 25 12
## 5 2015-02-03 Tues Feb 2 2 2
## 6 2015-02-17 Tues Feb 46 34 33
## 7 2015-02-26 Thurs Feb 30 22 22
## 8 2015-05-25 Mon May 99 55 66
## 9 2015-12-25 Fri Dec 1 1 1
## # … with 1 more variable: n_unique_routes <dbl>
ggplot(daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point()
ggplot(daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point() +
geom_point(data = low_weekdays, size = 5, color = "gray")
ggplot(daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point(data = low_weekdays, size = 5, color = "gray") +
geom_point()
ggplot(daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point(data = low_weekdays, size = 5, color = "gray") +
geom_point() +
geom_text(data = low_weekdays, aes(y = n_rides, label = ride_date),
size = 2, color = "black")
ggplot(daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point(data = low_weekdays, size = 5, color = "gray") +
geom_point() +
geom_text(data = low_weekdays, aes(y = n_rides + 15, label = ride_date),
size = 2, color = "black")
ggplot(daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point(data = low_weekdays, size = 5, color = "gray") +
geom_point() +
geom_text_repel(data = low_weekdays,
aes(x = ride_date, y = n_rides,
label = as.character(ride_date)),
size = 3, color = "black")
ggplot(daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point(data = low_weekdays, size = 5, color = "gray") +
geom_point() +
geom_label_repel(data = low_weekdays,
aes(x = ride_date, y = n_rides,
label = as.character(ride_date)),
size = 2, color = "black")
Exercise: How would you fix the following plot?
ggplot(daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_smooth(color = "blue")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
daily <- daily %>%
mutate(
day = if_else(day_of_week %in% c("Sat", "Sun"), "Weekend", "Weekday"),
temp = if_else(month %in% c("Jan", "Feb", "Mar", "Apr", "May", "Jun"),
"Cooler", "Warmer")
) %>%
select(day, temp, everything())
daily
## # A tibble: 364 x 9
## day temp ride_date day_of_week month n_rides n_riders n_unique_stops
## <chr> <chr> <date> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Week… Cool… 2015-01-01 Thurs Jan 58 37 44
## 2 Week… Cool… 2015-01-02 Fri Jan 134 83 93
## 3 Week… Cool… 2015-01-03 Sat Jan 145 84 100
## 4 Week… Cool… 2015-01-04 Sun Jan 101 57 63
## 5 Week… Cool… 2015-01-05 Mon Jan 182 117 109
## 6 Week… Cool… 2015-01-06 Tues Jan 267 138 146
## 7 Week… Cool… 2015-01-07 Wed Jan 243 157 129
## 8 Week… Cool… 2015-01-08 Thurs Jan 235 154 141
## 9 Week… Cool… 2015-01-09 Fri Jan 268 173 147
## 10 Week… Cool… 2015-01-10 Sat Jan 198 114 116
## # … with 354 more rows, and 1 more variable: n_unique_routes <dbl>
ggplot(data = daily, aes(x = ride_date, y = n_rides)) +
geom_line() +
facet_wrap( ~ day)
ggplot(data = daily, aes(x = ride_date, y = n_rides)) +
geom_line() +
facet_grid(temp ~ day)
ggplot(data = daily, aes(x = ride_date, y = n_rides)) +
geom_line() +
facet_grid(day ~ temp)
durham_voters %>%
select(race_code, gender_code, age)
## # A tibble: 204,063 x 3
## race_code gender_code age
## <chr> <chr> <chr>
## 1 I M Age Over 66
## 2 U U Age 18 - 25
## 3 O F Age 41 - 65
## 4 W F Age 41 - 65
## 5 W M Age 41 - 65
## 6 B M Age 26 - 40
## 7 W F Age 41 - 65
## 8 W M Age 26 - 40
## 9 B F Age 41 - 65
## 10 B M Age 41 - 65
## # … with 204,053 more rows
durham_voters %>%
group_by(race_code, gender_code, age) %>%
summarize(n_voters = n(), n_rep = sum(party == "REP"))
## # A tibble: 92 x 5
## # Groups: race_code, gender_code [21]
## race_code gender_code age n_voters n_rep
## <chr> <chr> <chr> <int> <int>
## 1 A F Age < 18 Or Invalid Birth Date 2 0
## 2 A F Age 18 - 25 751 35
## 3 A F Age 26 - 40 1086 64
## 4 A F Age 41 - 65 727 75
## 5 A F Age Over 66 170 36
## 6 A M Age 18 - 25 635 42
## 7 A M Age 26 - 40 919 64
## 8 A M Age 41 - 65 572 61
## 9 A M Age Over 66 175 33
## 10 A U Age 18 - 25 8 1
## # … with 82 more rows
durham_voters_summary <- durham_voters %>%
group_by(race_code, gender_code, age) %>%
summarize(n_all_voters = n(), n_rep_voters = sum(party == "REP")) %>%
filter(gender_code %in% c("F", "M") &
race_code %in% c("W", "B", "A") &
age != "Age < 18 Or Invalid Birth Date")
ggplot(durham_voters_summary, aes(x = age, y = n_all_voters)) +
geom_bar(stat = "identity") +
facet_grid(race_code ~ gender_code)
ggplot(durham_voters_summary, aes(x = age, y = n_all_voters)) +
geom_bar(stat = "identity") +
facet_grid(race_code ~ gender_code, scales = "free_y")
Using new tidyr function: pivot_longer()
durham_voters_summary %>%
tidyr::pivot_longer(cols = starts_with("n_"),
names_to = "voter_type", values_to = "n",
names_prefix = "n_") %>%
mutate(age_cat = as.numeric(as.factor(age))) %>%
ggplot(aes(x = age, y = n, color = voter_type)) +
geom_point() +
geom_line(aes(x = age_cat)) +
facet_grid(race_code ~ gender_code, scales = "free_y") +
expand_limits(y = 0)
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point() +
scale_y_reverse()
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point() +
scale_y_sqrt()
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point() +
scale_y_continuous(breaks = c(0, 200, 500))
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point() +
theme_bw()
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point() +
theme_dark()
ggplot(data = daily, aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point() +
theme(axis.text.x = element_text(angle = 90))
Exercise: Fix the axis labels in the following plot so they are at a 45 degree angle.
ggplot(durham_voters_summary, aes(x = age, y = n_all_voters)) +
geom_bar(stat = "identity") +
facet_grid(race_code ~ gender_code, scales = "free_y")